Join CTD and stomach data

‘Tidy datasets are all alike but every messy dataset is messy in its own way’

Author

Max Lindmark

Published

January 15, 2024

Here I will try and join CTD data to trawl data using the new key smhi_serial_no that was recently added to the trawl database. Essentially all surveys (quarter 1 and 4 every year) has a different file format.

Load packages

# Load libraries
library(plyr)
library(tidyverse)
library(readxl)
library(tidylog)
library(RCurl)
library(janitor)
library(patchwork)
library(devtools)
library(viridis)
library(nngeo)
library(conflicted)

# Source code for map plots
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")

home <- here::here()

# Because I loaded plyr (tidylog uses dplyr)
conflicts_prefer(tidylog::mutate)
conflicts_prefer(tidylog::summarise)
conflicts_prefer(tidylog::filter)
conflicts_prefer(tidylog::distinct)
conflicts_prefer(tidylog::group_by)
conflicts_prefer(tidylog::drop_na)
conflicts_prefer(tidylog::ungroup)
conflicts_prefer(tidylog::left_join)
conflicts_prefer(tidylog::select)
conflicts_prefer(dplyr::arrange)

# Load coordinate-changing functions
# Function to change coordinates to something more useful
format.position <- function(x){
  sign.x <- sign(x)
  x <- abs(x)
  x <- ifelse(nchar(x)==3, paste("0",x,sep=""), x)
  x <- ifelse(nchar(x)==2, paste("00",x,sep=""), x)
  x <- ifelse(nchar(x)==1, paste("000",x,sep=""), x)
  dec.x <- as.numeric(paste(substring(x,1,2)))+as.numeric(paste(substring(x,3,4)))/60
  dec.x <- sign.x*dec.x
}

Load data

First read the stomach + trawl data. Take the unique haul id because that’s all we need

d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv")) %>% 
  dplyr::select(lon, lat, X, Y, year, quarter, smhi_serial_no, haul_id, bottom_depth) %>% 
  distinct(haul_id, .keep_all = TRUE) %>% 
  mutate(q_year = paste(quarter, year, sep = "_"),
         smhi_serial_no_all = paste(q_year, smhi_serial_no, sep = "_")) %>% 
  as.data.frame()

Check if there are any NA’s in the one key we have to join the data

d %>% 
  group_by(year, quarter) %>% 
  summarise(sum_na = sum(is.na(smhi_serial_no))) %>% 
  as.data.frame()
   year quarter sum_na
1  2015       4      0
2  2016       1      0
3  2016       4      0
4  2017       1      0
5  2017       4      0
6  2018       1      0
7  2018       4      0
8  2019       4      0
9  2020       1      0
10 2020       4      7
11 2021       1     22
12 2021       4     13
13 2022       1     12
14 2022       4     14

Ok, so in addition to all the peculiarities with the data structure (sometimes haul is given in as a name of a tab, sometimes filename, sometimes cell in a non-R friendly ctd output), and the raw CTD data itself, it doesn’t look very promising that there are many missing keys in recent data.

I will anyway try with an early survey wihout any NA’s in the key.

Start with Q4 2015

# Nov 2015
setwd(paste0(home, "/data/bits-oxygen/15_11/CTD_SBE911/CNV/for_r/"))

filenames <- list.files(pattern = "*.xlsx")
filenames
 [1] "26da.2015.12.1.xlsx"  "26da.2015.12.10.xlsx" "26da.2015.12.11.xlsx"
 [4] "26da.2015.12.13.xlsx" "26da.2015.12.14.xlsx" "26da.2015.12.17.xlsx"
 [7] "26da.2015.12.18.xlsx" "26da.2015.12.20.xlsx" "26da.2015.12.22.xlsx"
[10] "26da.2015.12.24.xlsx" "26da.2015.12.26.xlsx" "26da.2015.12.27.xlsx"
[13] "26da.2015.12.3.xlsx"  "26da.2015.12.30.xlsx" "26da.2015.12.33.xlsx"
[16] "26da.2015.12.34.xlsx" "26da.2015.12.36.xlsx" "26da.2015.12.38.xlsx"
[19] "26da.2015.12.40.xlsx" "26da.2015.12.42.xlsx" "26da.2015.12.44.xlsx"
[22] "26da.2015.12.46.xlsx" "26da.2015.12.48.xlsx" "26da.2015.12.5.xlsx" 
[25] "26da.2015.12.50.xlsx" "26da.2015.12.52.xlsx" "26da.2015.12.54.xlsx"
[28] "26da.2015.12.56.xlsx" "26da.2015.12.58.xlsx" "26da.2015.12.7.xlsx" 
q4_15 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 385, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x))

q4_15 <- q4_15 %>% 
  mutate(filename = str_remove(filename, ".xlsx")) %>% 
  mutate(smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename))) %>% 
  dplyr::select(temp, oxy, smhi_serial_no, lat, lon, depth) %>% 
  dplyr::rename(lat_ctd = lat, 
                lon_ctd = lon)

# Now see how the unique values for the key ("smhi_serial_no") for trawl data and CTD data
d %>%
  filter(year == 2015, quarter == 4) %>%
  distinct(smhi_serial_no) %>%
  arrange(smhi_serial_no) %>% 
  pull()
 [1]  1  3  5  7 10 11 14 17 18 20 30 33 36 48 50 52
q4_15 %>%
  distinct(smhi_serial_no) %>%
  arrange(smhi_serial_no) %>%
  pull()
 [1]  1  3  5  7 10 11 13 14 17 18 20 22 24 26 27 30 33 34 36 38 40 42 44 46 48
[26] 50 52 54 56 58
# Explore the data a bit, because we have CTD data for many depths per trawl station
q4_15 %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2015, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxy, depth, color = as.factor(smhi_serial_no))) +
  scale_y_continuous(trans = "reverse") + 
  scale_color_viridis(discrete = TRUE) +
  geom_point()

q4_15 %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2015, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxy, depth)) +
  scale_y_continuous(trans = "reverse") +
  geom_point(size = 0.5) +
  facet_wrap(~smhi_serial_no)

# Before joining, we need to summarise the oxygen a bit. I *assume* that the deepest point is the bottom. I will average oxygen for the deepest 3 metres. 

q4_15_trim <- q4_15 %>% 
  group_by(smhi_serial_no) %>% 
  mutate(ctd_depth = ifelse(depth > max(depth) -3, "near bottom", "pelagic")) %>% 
  ungroup() %>% 
  filter(ctd_depth == "near bottom") %>% 
  group_by(smhi_serial_no) %>% 
  summarise(oxy = mean(oxy),
                   depth = mean(depth),
                   temp = mean(temp)) %>% 
  ungroup()
  
# Join in oxygen data!
d_q4_15 <- d %>% 
  filter(year == 2015, quarter == 4) %>% 
  left_join(q4_15_trim, by = "smhi_serial_no")

# Check oxygen values
# Seems low? This is from the BITS manual:
# Selected hauls should be omitted in the case when the results of at least two stations in the same depth layers have revealed that fish not appeared in the zone which was covered by the net opening and when hydrographical observations have revealed Manual of the Baltic International Trawl Surveys (BITS) 5 that oxygen content is less that 1.5 ml/l in the layer of vertical net opening

ggplot(d_q4_15, aes(smhi_serial_no, oxy)) + 
  geom_point() + 
  geom_hline(yintercept = 1.5, alpha = 0.5, linetype = 2)

ggplot(d_q4_15, aes(smhi_serial_no, temp)) + 
  geom_point()

# Check also the coordinates match
ctd_q415 <- q4_15_trim
trawl_q415 <- d %>% filter(year == 2015, quarter == 4) 
          
# https://stackoverflow.com/questions/71959927/spatial-join-two-data-frames-by-nearest-feature-and-date-in-r
trawl_sf_q415 <- trawl_q415 %>% st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>%
  st_set_crs(4326) 

ctd_q415 <- q4_15_trim %>%
  # Add back in coord since they were removed after summarising
  left_join(q4_15 %>%
              distinct(smhi_serial_no, .keep_all = TRUE) %>% 
              dplyr::select(lat_ctd, lon_ctd, smhi_serial_no), by = "smhi_serial_no") %>% 
  st_as_sf(coords = c("lon_ctd", "lat_ctd"), remove = FALSE) %>%
  st_set_crs(4326) 

# Plot on map
st_connect(trawl_sf_q415, ctd_q415) %>% 
  mapview::mapview() +
  mapview::mapview(trawl_sf_q415, color = 'tomato', col.regions = 'tomato') + 
  mapview::mapview(ctd_q415, color = 'steelblue', col.regions = 'steelblue')
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Seems to work! Now try a more recent year with NAs where we need to match by coordinates.. Let’s do Q4 2020. This specific survey comes in single file where station is not indicated by tab name, but a string in somewhat consistent position in the excel file. Each tab does have a 3 digit number that could be the serial no, but after looking at the serial numbers in the trawl data I see that it cannot be. They are in the 700-range.

d %>% 
  filter(year == 2020 & quarter == 4) %>% 
  distinct(smhi_serial_no) %>% pull()
 [1] 704 710 730  NA 734 719 735 737 703 715 716

Another critical issue here is that in this specific cell the key is placed within a string. Like this ** Station: 6.5 Faro 0716, where 0716 is the string. This is possible to retrieve though. However, for some tabs, only the station name is given, not the number. Like this: ** Station: 12se Nar. This will result in an NA. This means we now have two sources for NA in the matching columns, and I will therefore try to join by coordinates instead. HOWEVER, the coordinates are not stored in a column, but as a part of random string I need to find in the raw data… Hence the multiple lapply(tab_names_trim, function(x) read_excel(path = xl_data with different “skips”. Finally, not that the coordinates are not in decimal degrees but decimal minutes …

In this code, I read all tabs containing CTD data and put them in a list. Then I loop through all data frames in the list and clean them up.

setwd(home)

xl_data <- paste0(home, "/data/bits-oxygen/20_11/CTD_BITS-06.xlsx")
tab_names <- excel_sheets(path = xl_data)
tab_names_trim <- tab_names[grepl("\\_SBE", tab_names)]

# Put all tabs in a list. Skip different # rows to get either raw data, id or lat/lon
list_vars <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 12))
list_all_id <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x))
list_all_lat <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 8))
list_all_lon <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 9))

# list_all contains all relevant tabs. Loop through all these

vars_list <- list()

for(i in 1:length(tab_names_trim)){
  
  # Get the actual CTD data
  vars <- list_vars[[i]] %>% 
    dplyr::select(contains(c("deg C", "[true depth, m]", "[ml/l]"))) %>% 
    dplyr::rename(temperature = 1,
                  depth = 2,
                  oxygen = 3)

  # Now get the ID
  id <- names(list_all_id[[i]])[1]
  id <- strsplit(id, " ")[[1]]
  id <- as.numeric(id[length(id)])
  vars$smhi_serial_no <- id
  
  # Lat and lon for cross-checking
  vars$lat <- names(list_all_lat[[i]])[1]
  vars$lon <- names(list_all_lon[[i]])[1]
  
  # Clean up data!
  #lat.d.d = format.position(lat.dm.m)
  
  vars <- vars %>%
    mutate(temperature = as.numeric(temperature),
           depth = as.numeric(depth),
           oxygen = as.numeric(oxygen),
           smhi_serial_no = as.numeric(smhi_serial_no)) %>% 
    mutate(lat = str_remove(lat, "\\** Latitude: ")) %>% 
    mutate(lat = str_remove(lat, " N")) %>% 
    mutate(lat = str_remove(lat, " ")) %>% 
    mutate(lat = format.position(as.numeric(lat))) %>% 
    mutate(lat = as.numeric(lat)) %>% 
    mutate(lon = str_remove(lon, "\\** Longitude: ")) %>% 
    mutate(lon = str_remove(lon, " E")) %>% 
    mutate(lon = str_remove(lon, " ")) %>% 
    mutate(lon = format.position(as.numeric(lon))) %>% 
    mutate(lon = as.numeric(lon)) %>% 
    dplyr::rename(lat_ctd = lat, lon_ctd = lon, depth_ctd = depth)
  
  vars_list[[i]] <- vars
  
}

q4_20 <- bind_rows(vars_list)

First explore the data

q4_20 %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2020, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxygen, depth_ctd, color = as.factor(smhi_serial_no))) +
  scale_y_continuous(trans = "reverse") + 
  scale_color_viridis(discrete = TRUE) +
  geom_point()

q4_20 %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2020, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxygen, depth_ctd)) +
  scale_y_continuous(trans = "reverse") +
  geom_point(size = 0.5) +
  facet_wrap(~smhi_serial_no)

Again, lots of NA in the smhi_serial_no. Make a new station ID by using coordinates instead.

q4_20 %>% 
  arrange(desc(depth_ctd))
# A tibble: 2,993 × 6
   temperature depth_ctd  oxygen smhi_serial_no lat_ctd lon_ctd
         <dbl>     <dbl>   <dbl>          <dbl>   <dbl>   <dbl>
 1       -2.04     2122.   15.1             713    57.9    19.4
 2       -8.91     1886. -140.              713    57.9    19.4
 3      -13.2      1466. -137.              713    57.9    19.4
 4       -8.90     1140. -126.              713    57.9    19.4
 5       -2.03      886.   12.8             713    57.9    19.4
 6        2.09      689.   10.9             713    57.9    19.4
 7        4.56      536.    9.96            713    57.9    19.4
 8        6.04      417.    9.42            713    57.9    19.4
 9        6.94      324.    9.07            713    57.9    19.4
10        7.47      252.    8.87            713    57.9    19.4
# ℹ 2,983 more rows
q4_20_id <- q4_20 %>% 
  mutate(id = paste(round(lat_ctd, digits = 3), round(lon_ctd, digits = 3), sep = "_"))

q4_20_id %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2020, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxygen, depth_ctd, color = as.factor(id))) +
  scale_y_continuous(trans = "reverse") + 
  scale_color_viridis(discrete = TRUE) +
  geom_point()

q4_20_id %>% 
  filter(smhi_serial_no %in% c(unique(filter(d, year == 2020, quarter == 4)$smhi_serial_no))) %>% 
  ggplot(aes(oxygen, depth_ctd)) +
  scale_y_continuous(trans = "reverse") +
  geom_point(size = 0.5) +
  facet_wrap(~id)

Still an NA but that’s all we can do. Repeat the processes of summarizing oxygen near bottom.

q4_20_trim <- q4_20_id %>% 
  group_by(id) %>% 
  mutate(ctd_depth = ifelse(depth_ctd > max(depth_ctd) -3, "near bottom", "pelagic")) %>% 
  ungroup() %>% 
  filter(ctd_depth == "near bottom") %>% 
  group_by(id) %>% 
  summarise(oxy = mean(oxygen),
                   depth = mean(depth_ctd),
                   temp = mean(temperature)) %>% 
  ungroup()

# Inspect
q4_20_trim %>% as.data.frame()
              id         oxy      depth      temp
1  55.183_13.583  4.37845000   39.65850 12.471833
2     55.25_13.9  6.68965000   36.65450 10.718050
3   55.45_14.567  2.63443333   57.92550 12.271250
4    55.667_14.5  3.19096667   45.80550 10.971283
5  55.683_14.367  5.60985000   39.73350 10.869200
6    55.7_14.417  6.79985000   35.68600 10.869567
7    55.817_15.5  2.99953333   45.28550 10.696700
8      55.9_15.6  3.83463333   47.83317 10.678850
9  56.517_16.833  0.10241667   58.97433  5.675767
10     56.717_17  0.35266667   61.00600  5.593233
11   56.7_17.017  0.12108333   64.54700  5.604883
12   56.967_17.2  0.11041667   68.58183  5.691567
13 56.983_17.917  0.08928333   66.55133  5.874167
14 57.083_18.917  0.08175000   85.23950  6.469133
15 57.167_18.883  4.63080000   50.87600  5.836750
16  57.233_19.05  0.10833333   78.18417  6.066900
17 57.333_19.233  0.07191667   97.88750  6.804233
18     57.3_19.1  0.38640000   73.64500  5.954150
19 57.667_19.383  0.09115000   74.14717  6.149417
20  57.75_19.467  0.08200000   78.69850  6.290733
21 57.867_19.517  0.60720000   68.62767  5.967217
22    57.9_19.45 15.08860000 2121.50400 -2.043100
23   58.033_19.5  0.07616667  100.96650  6.330050
24  58.083_19.45  0.09268333   84.78933  6.235283
25         NA_NA  0.09283333   75.67717  5.843117

Ok, so here we also see that in one of the stations, the depth measure is way off, and this is verified in the excel file as well.

Remove this.

q4_20_trim <- q4_20_trim %>% filter(depth < 300)

Here we would have joined this trimmed CTD data with the trawl data, but because of the lack of key, I will instead have to do it by matching nearest point.

# https://stackoverflow.com/questions/59621797/evaluating-the-closest-distance-from-one-point-between-multiple-options
trawl_sf <- d %>%
  filter(year == 2020 & quarter == 4) %>%
  st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>%
  st_set_crs(4326) 

ctd_sf <- q4_20_trim %>%
  separate(id, sep = "_", into = c("lat", "lon"), convert = TRUE) %>% 
  drop_na(lat) %>%
  drop_na(lon) %>% 
  st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>%
  st_set_crs(4326) 

# Join df with df1, based on the nearest feature:
nrow(trawl_sf)
[1] 17
nrow(ctd_sf)
[1] 23
df_near <- st_join(trawl_sf, ctd_sf, join = st_nearest_feature)

str(df_near)
Classes 'sf' and 'data.frame':  17 obs. of  17 variables:
 $ lon.x             : num  17 19.2 13.6 13.9 13.9 ...
 $ lat.x             : num  57.4 57.6 55.2 55.3 55.2 ...
 $ X                 : num  621 749 411 431 433 ...
 $ Y                 : num  6364 6392 6118 6127 6123 ...
 $ year              : num  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
 $ quarter           : num  4 4 4 4 4 4 4 4 4 4 ...
 $ smhi_serial_no    : num  704 710 730 NA NA NA 734 719 735 NA ...
 $ haul_id           : chr  "2020_4_213" "2020_4_218" "2020_4_237" "2020_4_238" ...
 $ bottom_depth      : num  552 465 433 371 381 565 464 484 343 381 ...
 $ q_year            : chr  "4_2020" "4_2020" "4_2020" "4_2020" ...
 $ smhi_serial_no_all: chr  "4_2020_704" "4_2020_710" "4_2020_730" "4_2020_NA" ...
 $ lat.y             : num  57 57.7 55.2 55.2 55.2 ...
 $ lon.y             : num  17.2 19.4 13.6 13.9 13.9 ...
 $ oxy               : num  0.1104 0.0911 4.3784 6.6896 6.6896 ...
 $ depth             : num  68.6 74.1 39.7 36.7 36.7 ...
 $ temp              : num  5.69 6.15 12.47 10.72 10.72 ...
 $ geometry          :sfc_POINT of length 17; first list element:  'XY' num  17 57.4
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
  ..- attr(*, "names")= chr [1:16] "lon.x" "lat.x" "X" "Y" ...
# Check distance in metres
ctd_sf %>%
  cbind(
    trawl_sf[st_nearest_feature(ctd_sf, trawl_sf),]) %>% 
  mutate(dist = st_distance(geometry, geometry.1, by_element = T)) %>% 
  arrange(desc(dist)) %>% 
  as.data.frame()
        lat    lon        oxy     depth      temp    lon.1    lat.1        X
14   56.517 16.833 0.10241667  58.97433  5.675767 16.90000 57.35000 614.3265
14.2 56.700 17.017 0.12108333  64.54700  5.604883 16.90000 57.35000 614.3265
14.1 56.717 17.000 0.35266667  61.00600  5.593233 16.90000 57.35000 614.3265
8    56.983 17.917 0.08928333  66.55133  5.874167 18.85000 57.16667 732.7606
1    56.967 17.200 0.11041667  68.58183  5.691567 17.01667 57.40000 621.1803
2    57.667 19.383 0.09115000  74.14717  6.149417 19.16667 57.60000 748.9352
17   57.750 19.467 0.08200000  78.69850  6.290733 19.50000 57.86667 766.8602
13.1 57.333 19.233 0.07191667  97.88750  6.804233 19.05000 57.30000 743.9612
8.1  57.083 18.917 0.08175000  85.23950  6.469133 18.85000 57.16667 732.7606
13   57.233 19.050 0.10833333  78.18417  6.066900 19.05000 57.30000 743.9612
6    55.450 14.567 2.63443333  57.92550 12.271250 14.46667 55.45000 466.2665
16   58.033 19.500 0.07616667 100.96650  6.330050 19.43333 58.06667 761.4486
12   55.817 15.500 2.99953333  45.28550 10.696700 15.56667 55.83333 535.4935
11   55.900 15.600 3.83463333  47.83317 10.678850 15.56667 55.86667 535.4631
4    55.250 13.900 6.68965000  36.65450 10.718050 13.91667 55.28333 431.1914
10   55.700 14.417 6.79985000  35.68600 10.869567 14.36667 55.70000 460.1953
13.2 57.300 19.100 0.38640000  73.64500  5.954150 19.05000 57.30000 743.9612
3    55.183 13.583 4.37845000  39.65850 12.471833 13.60000 55.20000 410.8932
16.1 58.083 19.450 0.09268333  84.78933  6.235283 19.43333 58.06667 761.4486
8.2  57.167 18.883 4.63080000  50.87600  5.836750 18.85000 57.16667 732.7606
7    55.667 14.500 3.19096667  45.80550 10.971283 14.50000 55.68333 468.5617
9    55.683 14.367 5.60985000  39.73350 10.869200 14.35000 55.68333 459.1304
17.1 57.867 19.517 0.60720000  68.62767  5.967217 19.50000 57.86667 766.8602
            Y year quarter smhi_serial_no    haul_id bottom_depth q_year
14   6357.944 2020       4            703 2020_4_212          435 4_2020
14.2 6357.944 2020       4            703 2020_4_212          435 4_2020
14.1 6357.944 2020       4            703 2020_4_212          435 4_2020
8    6342.514 2020       4            719 2020_4_227          484 4_2020
1    6363.710 2020       4            704 2020_4_213          552 4_2020
2    6391.826 2020       4            710 2020_4_218          465 4_2020
17   6422.747 2020       4            716 2020_4_224           69 4_2020
13.1 6358.042 2020       4             NA 2020_4_229          677 4_2020
8.1  6342.514 2020       4            719 2020_4_227          484 4_2020
13   6358.042 2020       4             NA 2020_4_229          677 4_2020
6    6144.998 2020       4             NA 2020_4_240          565 4_2020
16   6444.723 2020       4            715 2020_4_223          754 4_2020
12   6187.676 2020       4             NA 2020_4_245          375 4_2020
11   6191.385 2020       4            737 2020_4_244          502 4_2020
4    6126.856 2020       4             NA 2020_4_238          371 4_2020
10   6172.873 2020       4             NA 2020_4_243          381 4_2020
13.2 6358.042 2020       4             NA 2020_4_229          677 4_2020
3    6117.942 2020       4            730 2020_4_237          433 4_2020
16.1 6444.723 2020       4            715 2020_4_223          754 4_2020
8.2  6342.514 2020       4            719 2020_4_227          484 4_2020
7    6170.950 2020       4            734 2020_4_241          464 4_2020
9    6171.028 2020       4            735 2020_4_242          343 4_2020
17.1 6422.747 2020       4            716 2020_4_224           69 4_2020
     smhi_serial_no_all          dist              geometry
14           4_2020_703 92851.403 [m] POINT (16.833 56.517)
14.2         4_2020_703 72732.099 [m]   POINT (17.017 56.7)
14.1         4_2020_703 70752.168 [m]     POINT (17 56.717)
8            4_2020_719 60169.007 [m] POINT (17.917 56.983)
1            4_2020_704 49478.294 [m]   POINT (17.2 56.967)
2            4_2020_710 14922.439 [m] POINT (19.383 57.667)
17           4_2020_716 13140.927 [m]  POINT (19.467 57.75)
13.1          4_2020_NA 11623.025 [m] POINT (19.233 57.333)
8.1          4_2020_719 10162.682 [m] POINT (18.917 57.083)
13            4_2020_NA  7461.431 [m]  POINT (19.05 57.233)
6             4_2020_NA  6348.684 [m]  POINT (14.567 55.45)
16           4_2020_715  5436.753 [m]   POINT (19.5 58.033)
12            4_2020_NA  4556.868 [m]   POINT (15.5 55.817)
11           4_2020_737  4257.394 [m]     POINT (15.6 55.9)
4             4_2020_NA  3859.228 [m]    POINT (13.9 55.25)
10            4_2020_NA  3164.719 [m]   POINT (14.417 55.7)
13.2          4_2020_NA  3014.117 [m]     POINT (19.1 57.3)
3            4_2020_730  2180.378 [m] POINT (13.583 55.183)
16.1         4_2020_715  2068.020 [m]  POINT (19.45 58.083)
8.2          4_2020_719  1996.844 [m] POINT (18.883 57.167)
7            4_2020_734  1818.487 [m]   POINT (14.5 55.667)
9            4_2020_735  1069.982 [m] POINT (14.367 55.683)
17.1         4_2020_716  1009.671 [m] POINT (19.517 57.867)
                    geometry.1
14          POINT (16.9 57.35)
14.2        POINT (16.9 57.35)
14.1        POINT (16.9 57.35)
8       POINT (18.85 57.16667)
1        POINT (17.01667 57.4)
2        POINT (19.16667 57.6)
17       POINT (19.5 57.86667)
13.1        POINT (19.05 57.3)
8.1     POINT (18.85 57.16667)
13          POINT (19.05 57.3)
6       POINT (14.46667 55.45)
16   POINT (19.43333 58.06667)
12   POINT (15.56667 55.83333)
11   POINT (15.56667 55.86667)
4    POINT (13.91667 55.28333)
10       POINT (14.36667 55.7)
13.2        POINT (19.05 57.3)
3            POINT (13.6 55.2)
16.1 POINT (19.43333 58.06667)
8.2     POINT (18.85 57.16667)
7        POINT (14.5 55.68333)
9       POINT (14.35 55.68333)
17.1     POINT (19.5 57.86667)
# Here are the points on a map. Some trawl stations that are far from a CTD reading
st_connect(trawl_sf, ctd_sf) %>% 
  mapview::mapview() +
  mapview::mapview(trawl_sf, color = 'tomato', col.regions = 'tomato') + 
  mapview::mapview(ctd_sf, color = 'steelblue', col.regions = 'steelblue')
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
Calculating lines
# Here we can see which values of CTD are matched to trawl data
ggplot() + 
  geom_sf(data = trawl_sf, aes(color = "trawl"), size = 3, alpha = .6) +
  geom_sf(data = ctd_sf, aes(color = "ctd"), alpha = .6) +
  scale_color_manual(values = c("steelblue", "tomato"), name = "Data type") + 
  geom_sf(data = st_connect(trawl_sf, ctd_sf), linewidth = 0.2)
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Preliminary conclusions:

  1. Not all trawl data have a CTD measurement, we’d need to discard stomach data, and some CTD measurements are nonsens
  2. It is extremely time-consuming to prepare the data, essentially it needs to be done by each survey, and sometimes within each file (station) within a survey
  3. At which depths do we summarise oxygen data? We cannot no for sure it’s the bottom
  4. How far from trawl locaiton is CTD ok to use?

Do it for all years!

Here I will redo everything but with only the essential code. I can do until 18_11 using the approach in the first section.

Note, it’s not 100% the same code, because in some surveys, the row of data starts at a different line, and there are also different amounts of columns…

# Nov 2015
setwd(paste0(home, "/data/bits-oxygen/15_11/CTD_SBE911/CNV/for_r/"))

filenames <- list.files(pattern = "*.xlsx")

q4_15 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 385, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(4, 2015, sep = "_"),
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))

# Feb 2016
setwd(paste0(home, "/data/bits-oxygen/16_02/"))

filenames <- list.files(pattern = "*.xlsx")

q1_16 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 368, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2016, sep = "_"), 
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))

# Nov 2016
setwd(paste0(home, "/data/bits-oxygen/16_11/"))

filenames <- list.files(pattern = "*.xlsx")

q4_16 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 368, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(4, 2016, sep = "_"), 
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))

# Feb 2017
setwd(paste0(home, "/data/bits-oxygen/17_02/"))

filenames <- list.files(pattern = "*.xlsx")

q1_17 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 385, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2017, sep = "_"),
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))

# Nov 2017
setwd(paste0(home, "/data/bits-oxygen/17_11/"))

filenames <- list.files(pattern = "*.xlsx")

q4_17 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 375, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 22, 23) %>% # Note: different indicies here
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x) %>% 
                         mutate(temp = as.numeric(temp),
                                oxy = as.numeric(oxy),
                                lat = as.numeric(lat),
                                lon = as.numeric(lon))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(4, 2017, sep = "_"),
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))


# Feb 2018
setwd(paste0(home, "/data/bits-oxygen/18_02/"))

filenames <- list.files(pattern = "*.xlsx")

q1_18 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 375, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 22, 23) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x) %>% 
                         mutate(temp = as.numeric(temp),
                                oxy = as.numeric(oxy),
                                lat = as.numeric(lat),
                                lon = as.numeric(lon))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2018, sep = "_"),
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)))

# Nov 2018
setwd(paste0(home, "/data/bits-oxygen/18_11/"))

filenames <- list.files(pattern = "*.xlsx")

q4_18 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 377, col_types = "guess") %>% 
                         dplyr::select(4, 12, 19, 23, 24) %>% 
                         dplyr::rename(temp = 1,
                                       oxy = 2,
                                       depth = 3, 
                                       lat = 4,
                                       lon = 5) %>%
                         mutate(filename = .x) %>% 
                         mutate(temp = as.numeric(temp),
                                oxy = as.numeric(oxy),
                                depth = as.numeric(depth),
                                lat = as.numeric(lat),
                                lon = as.numeric(lon))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         smhi_serial_no = as.numeric(sub(".*\\.(.*)", "\\1", filename)),
         q_year = paste(4, 2018, sep = "_"))

setwd(home)

# Combine all these data
oxy_old_ctd <- bind_rows(q4_15,
                         q1_16, q4_16,
                         q1_17, q4_17,
                         q1_18, q4_18) %>% 
  drop_na(oxy)

# Quick inspect
ggplot(oxy_old_ctd, aes(oxy)) + 
  geom_histogram() + 
  facet_wrap(~ q_year)

ggplot(oxy_old_ctd, aes(lat)) + 
  facet_wrap(~q_year, scales = "free") + 
  geom_histogram()

ggplot(oxy_old_ctd %>% filter(q_year == "4_2018"), aes(lat)) + 
  facet_wrap(~filename, scales = "free") + 
  geom_histogram()

Some coordinates are off, but they are also in the raw data

Now more on to the more recent data. 2019 Q4 and 2020 Q1 will have to be done solo because they are single excel file with tabs for each station. It does seem to have station number as tab names so I don’t need to match by coordinates.

Here I don’t have a depth column, only pressue and what I guess is bottom depth. I will keep pressure. We need this to calculate the average oxygen near sea-bottom. Before I said 3 metres above deepest, but I might just go with the highest pressure here for this survey.

setwd(home)

d %>%
  filter(year == 2019, quarter == 4) %>%
  distinct(smhi_serial_no) %>%
  arrange(smhi_serial_no) %>% 
  pull()
 [1] 64 65 66 67 68 69 70 72 73 74 78 79 80 82 83 86 87 88 89 90 91 92 93
d %>%
  filter(year == 2020, quarter == 1) %>%
  distinct(smhi_serial_no) %>%
  arrange(smhi_serial_no) %>% 
  pull()
 [1]  99 100 101 102 103 104 105 106 107 109 111 112 114 117 118 120 121 122 123
[20] 125 126 127 128 129 130 131 132 133 134 135 136 142 143
# OK, yes, tabs are station numbers (verify in spreadsheet). Read tabs as elements in a list, then loop through

# 2019 Q4
xl_data <- paste0(home, "/data/bits-oxygen/19_11/Bits2019 Q4 redigerad.xlsx")
tab_names <- excel_sheets(path = xl_data)
# Select only tabs with CTD data
tab_names_trim <- tab_names[grepl("CTD", tab_names)]

# Put all tabs in a list. Skip different # rows to get either raw data, id or lat/lon
list_vars_q419 <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 34))

str(list_vars_q419[[1]])
tibble [391 × 19] (S3: tbl_df/tbl/data.frame)
 $ Date / Time                          : chr [1:391] "20/11/2019 06:58:26" "20/11/2019 07:03:46" "20/11/2019 07:03:47" "20/11/2019 07:03:49" ...
 $ TURBIDITY;FTU                        : num [1:391] 13.38 2.38 2.38 2.44 2.38 ...
 $ DISSOLVED OXYGEN;SAT%                : num [1:391] 85.6 83.3 83 82.6 82.6 ...
 $ PRESSURE;DBAR                        : num [1:391] 0.249 0.403 0.395 0.191 0.203 0.415 0.389 0.405 0.383 0.184 ...
 $ TEMPERATURE;C                        : num [1:391] 9.64 9.67 9.67 9.67 9.66 ...
 $ CONDUCTIVITY;MS/CM                   : num [1:391] 9.57 9.56 9.56 9.55 9.55 ...
 $ PH;PH                                : num [1:391] -3.15 -3.15 -3.15 -3.15 -3.15 ...
 $ REDOX;MV                             : num [1:391] 413 414 415 417 417 ...
 $ Calc, SALINITY; PSU                  : num [1:391] 7.82 7.81 7.81 7.8 7.8 ...
 $ Calc, DENSITY ANOMALY; KG/M3 [EOS-80]: num [1:391] 1006 1006 1006 1006 1006 ...
 $ Calc, SOS; M/SEC                     : num [1:391] 1456 1456 1456 1456 1456 ...
 $ ml/L                                 : num [1:391] 6.48 6.3 6.28 6.25 6.25 ...
 $ ...13                                : logi [1:391] NA NA NA NA NA NA ...
 $ 1463.1255303030291                   : logi [1:391] NA NA NA NA NA NA ...
 $ 10.745775757575752                   : logi [1:391] NA NA NA NA NA NA ...
 $ 12.625266666666668                   : logi [1:391] NA NA NA NA NA NA ...
 $ 10.18184242424242                    : logi [1:391] NA NA NA NA NA NA ...
 $ 38.911872727272758                   : logi [1:391] NA NA NA NA NA NA ...
 $ 5.0589626909370216                   : logi [1:391] NA NA NA NA NA NA ...
# list_all contains all relevant tabs. Loop through all these

vars_list_q419 <- list()

for(i in 1:length(tab_names_trim)){
  
  # Get the actual CTD data
  vars <- list_vars_q419[[i]] %>% 
    dplyr::rename(temp = `TEMPERATURE;C`,
                  oxy = `ml/L`,
                  pressue_bar = `PRESSURE;DBAR`) %>% 
    dplyr::select(temp, oxy, pressue_bar) %>% 
    mutate(q_year = paste(4, 2019, sep = "_"),
                  smhi_serial_no = tab_names_trim[i],
                  smhi_serial_no = as.numeric(str_remove(smhi_serial_no, "CTD")))

  vars_list_q419[[i]] <- vars
  
}

q4_19 <- bind_rows(vars_list_q419)


# 2020 Q1
xl_data <- paste0(home, "/data/bits-oxygen/20_02/Bits 2020 Q1.xlsx")
tab_names <- excel_sheets(path = xl_data)
# Select only tabs with CTD data
tab_names_trim <- tab_names[grepl("CTD", tab_names)]

# Put all tabs in a list. Skip different # rows to get either raw data, id or lat/lon
list_vars_q120 <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 34))

str(list_vars_q120[[1]])
tibble [215 × 19] (S3: tbl_df/tbl/data.frame)
 $ Date / Time                          : chr [1:215] "26/02/2020 05:48:58" "26/02/2020 05:49:00" "26/02/2020 05:49:01" "26/02/2020 05:49:02" ...
 $ TURBIDITY;FTU                        : num [1:215] 2.5 2.5 2.38 2.44 2.38 ...
 $ DISSOLVED OXYGEN;SAT%                : num [1:215] 72.6 72.8 72.8 72.9 72.9 ...
 $ PRESSURE;DBAR                        : num [1:215] 0.255 0.426 0.614 0.821 1.016 ...
 $ TEMPERATURE;C                        : num [1:215] 5.45 5.46 5.46 5.45 5.46 ...
 $ CONDUCTIVITY;MS/CM                   : num [1:215] 9.25 9.25 9.26 9.26 9.26 ...
 $ PH;PH                                : num [1:215] -3.15 -3.15 -3.15 -3.15 -3.15 ...
 $ REDOX;MV                             : num [1:215] 326 343 348 355 360 ...
 $ Calc, SALINITY; PSU                  : num [1:215] 8.51 8.51 8.51 8.51 8.51 ...
 $ Calc, DENSITY ANOMALY; KG/M3 [EOS-80]: num [1:215] 1007 1007 1007 1007 1007 ...
 $ Calc, SOS; M/SEC                     : num [1:215] 1439 1439 1439 1439 1439 ...
 $ ml/L                                 : num [1:215] 6.06 6.07 6.08 6.08 6.09 ...
 $ ...13                                : logi [1:215] NA NA NA NA NA NA ...
 $ 1443.4471935483862                   : logi [1:215] NA NA NA NA NA NA ...
 $ 6.1277580645161311                   : logi [1:215] NA NA NA NA NA NA ...
 $ 10.384677419354841                   : logi [1:215] NA NA NA NA NA NA ...
 $ 9.4149139784946225                   : logi [1:215] NA NA NA NA NA NA ...
 $ 24.387274193548393                   : logi [1:215] NA NA NA NA NA NA ...
 $ 5.4491714947240055                   : logi [1:215] NA NA NA NA NA NA ...
# list_all contains all relevant tabs. Loop through all these
vars_list_q120 <- list()

for(i in 1:length(tab_names_trim)){
  
  # Get the actual CTD data
  vars <- list_vars_q120[[i]] %>% 
    dplyr::rename(temp = `TEMPERATURE;C`,
                  oxy = `ml/L`,
                  pressue_bar = `PRESSURE;DBAR`) %>% 
    dplyr::select(temp, oxy, pressue_bar) %>% 
    mutate(q_year = paste(1, 2020, sep = "_"),
           smhi_serial_no = tab_names_trim[i],
           smhi_serial_no = as.numeric(str_remove(smhi_serial_no, "CTD")))


  vars_list_q120[[i]] <- vars
  
}

q1_20 <- bind_rows(vars_list_q120)

unique(q1_20$smhi_serial_no)
 [1]  99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
[20] 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
[39] 137 138 139 140 141 143

Ok, now we are entering the times when we have to join by coordinates. It’s also the type of files where this is not stored as a column… First I need to do Q4 2020 again, because unlike the rest of the data that comes after, they are in a single file with tabs, the others have 1 file per station

setwd(home)

xl_data <- paste0(home, "/data/bits-oxygen/20_11/CTD_BITS-06.xlsx")
tab_names <- excel_sheets(path = xl_data)
tab_names_trim <- tab_names[grepl("\\_SBE", tab_names)]

# Put all tabs in a list. Skip different # rows to get either raw data, id or lat/lon
list_vars <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 12))
list_all_id <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x))
list_all_lat <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 8))
list_all_lon <- lapply(tab_names_trim, function(x) read_excel(path = xl_data, sheet = x, skip = 9))

# list_all contains all relevant tabs. Loop through all these

vars_list <- list()

for(i in 1:length(tab_names_trim)){
  
  # Get the actual CTD data
  vars <- list_vars[[i]] %>% 
    dplyr::select(contains(c("deg C", "[true depth, m]", "[ml/l]"))) %>% 
    dplyr::rename(temp = 1,
                  depth = 2,
                  oxy = 3)

  # Now get the ID
  id <- names(list_all_id[[i]])[1]
  id <- strsplit(id, " ")[[1]]
  id <- as.numeric(id[length(id)])
  vars$smhi_serial_no <- id
  
  # Lat and lon for cross-checking
  vars$lat <- names(list_all_lat[[i]])[1]
  vars$lon <- names(list_all_lon[[i]])[1]
  
  # Clean up data!
  #lat.d.d = format.position(lat.dm.m)
  
  vars <- vars %>%
    mutate(temp = as.numeric(temp),
           depth = as.numeric(depth),
           oxy = as.numeric(oxy),
           smhi_serial_no = as.numeric(smhi_serial_no)) %>% 
    mutate(lat = str_remove(lat, "\\** Latitude: ")) %>% 
    mutate(lat = str_remove(lat, " N")) %>% 
    mutate(lat = str_remove(lat, " ")) %>% 
    mutate(lat = format.position(as.numeric(lat))) %>% 
    mutate(lat = as.numeric(lat)) %>% 
    mutate(lon = str_remove(lon, "\\** Longitude: ")) %>% 
    mutate(lon = str_remove(lon, " E")) %>% 
    mutate(lon = str_remove(lon, " ")) %>% 
    mutate(lon = format.position(as.numeric(lon))) %>% 
    mutate(lon = as.numeric(lon))
  
  vars_list[[i]] <- vars
  
}

q4_20 <- bind_rows(vars_list) %>% mutate(q_year = paste(4, 2020, sep = "_"))

Now do the rest of the recent data. This is q1 2021, q4 2021, q1 2022 and q4 2022

For quarter 1, 2021, the actual data doesn’t start at the same row number… need to split them in two folders. Therefore I do this one separately

# Q1 2021
setwd(paste0(home, "/data/bits-oxygen/21_02/208/"))

filenames <- list.files(pattern = "*.xlsx")

q1_21a <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 207, col_types = "guess") %>% 
                         dplyr::select(3, 10, 12) %>% 
                         dplyr::rename(temp = 1,
                                       depth = 2, 
                                       oxy = 3) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2021, sep = "_"))

q1_21a
# A tibble: 5,714 × 5
   temp   depth oxy    filename                            q_year
   <chr>  <chr> <chr>  <chr>                               <chr> 
 1 2.8468 2.025 9.2294 SBE19_6164_20210222_0828_77_10_0204 1_2021
 2 2.8456 2.531 9.2259 SBE19_6164_20210222_0828_77_10_0204 1_2021
 3 2.8471 3.038 9.2283 SBE19_6164_20210222_0828_77_10_0204 1_2021
 4 2.8480 3.544 9.2258 SBE19_6164_20210222_0828_77_10_0204 1_2021
 5 2.8482 4.050 9.2141 SBE19_6164_20210222_0828_77_10_0204 1_2021
 6 2.8471 4.556 9.2322 SBE19_6164_20210222_0828_77_10_0204 1_2021
 7 2.8474 5.063 9.2360 SBE19_6164_20210222_0828_77_10_0204 1_2021
 8 2.8494 5.569 9.2330 SBE19_6164_20210222_0828_77_10_0204 1_2021
 9 2.8511 6.075 9.2318 SBE19_6164_20210222_0828_77_10_0204 1_2021
10 2.8555 6.581 9.2266 SBE19_6164_20210222_0828_77_10_0204 1_2021
# ℹ 5,704 more rows
# Now I need to find the coordinates, and match them by the filename.
q1_21a_lat <- purrr::map_df(filenames, 
                            ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                              mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITS`, "...3", "...4", "filename")) %>% 
  filter(`Cruise:BITS` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
                coords = paste(...3, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_21a_lat
# A tibble: 41 × 3
   lat_minute filename                              lat
   <chr>      <chr>                               <dbl>
 1 5537.46    SBE19_6164_20210222_0828_77_10_0204  55.6
 2 5541.45    SBE19_6164_20210222_1028_77_10_0205  55.7
 3 5540.96    SBE19_6164_20210222_1609_77_10_0207  55.7
 4 5541.48    SBE19_6164_20210223_0536_77_10_0208  55.7
 5 5548.500   SBE19_6164_20210223_1354_77_10_0209  55.8
 6 5549.38    SBE19_6164_20210223_1559_77_10_0210  55.8
 7 5555.77    SBE19_6164_20210224_0500_77_10_0211  55.9
 8 5607.94    SBE19_6164_20210224_0720_77_10_0212  56.1
 9 5612.67    SBE19_6164_20210224_0922_77_10_0213  56.2
10 5618.93    SBE19_6164_20210224_1141_77_10_0214  56.3
# ℹ 31 more rows
q1_21a_lon <- purrr::map_df(filenames, 
                            ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                              mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITS`, "...3", "...4", "filename")) %>% 
  filter(`Cruise:BITS` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
                coords = paste(...3, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon = format.position(as.numeric(lon_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_21a_lon
# A tibble: 41 × 3
   lon_minute filename                              lon
   <chr>      <chr>                               <dbl>
 1 1440.76    SBE19_6164_20210222_0828_77_10_0204  14.7
 2 14453.23   SBE19_6164_20210222_1028_77_10_0205  14.8
 3 1423.67    SBE19_6164_20210222_1609_77_10_0207  14.4
 4 1422.64    SBE19_6164_20210223_0536_77_10_0208  14.4
 5 1553.94    SBE19_6164_20210223_1354_77_10_0209  15.9
 6 1558.77    SBE19_6164_20210223_1559_77_10_0210  16.0
 7 1837.55    SBE19_6164_20210224_0500_77_10_0211  18.6
 8 1820.57    SBE19_6164_20210224_0720_77_10_0212  18.3
 9 1827.74    SBE19_6164_20210224_0922_77_10_0213  18.4
10 1825.56    SBE19_6164_20210224_1141_77_10_0214  18.4
# ℹ 31 more rows
coords_a <- q1_21a_lat %>% left_join(q1_21a_lon, by = "filename") %>% dplyr::select(filename, lat, lon)
coords_a
# A tibble: 41 × 3
   filename                              lat   lon
   <chr>                               <dbl> <dbl>
 1 SBE19_6164_20210222_0828_77_10_0204  55.6  14.7
 2 SBE19_6164_20210222_1028_77_10_0205  55.7  14.8
 3 SBE19_6164_20210222_1609_77_10_0207  55.7  14.4
 4 SBE19_6164_20210223_0536_77_10_0208  55.7  14.4
 5 SBE19_6164_20210223_1354_77_10_0209  55.8  15.9
 6 SBE19_6164_20210223_1559_77_10_0210  55.8  16.0
 7 SBE19_6164_20210224_0500_77_10_0211  55.9  18.6
 8 SBE19_6164_20210224_0720_77_10_0212  56.1  18.3
 9 SBE19_6164_20210224_0922_77_10_0213  56.2  18.4
10 SBE19_6164_20210224_1141_77_10_0214  56.3  18.4
# ℹ 31 more rows
q1_21a <- q1_21a %>% left_join(coords_a, by = "filename")

q1_21a
# A tibble: 5,714 × 7
   temp   depth oxy    filename                            q_year   lat   lon
   <chr>  <chr> <chr>  <chr>                               <chr>  <dbl> <dbl>
 1 2.8468 2.025 9.2294 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 2 2.8456 2.531 9.2259 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 3 2.8471 3.038 9.2283 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 4 2.8480 3.544 9.2258 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 5 2.8482 4.050 9.2141 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 6 2.8471 4.556 9.2322 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 7 2.8474 5.063 9.2360 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 8 2.8494 5.569 9.2330 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
 9 2.8511 6.075 9.2318 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
10 2.8555 6.581 9.2266 SBE19_6164_20210222_0828_77_10_0204 1_2021  55.6  14.7
# ℹ 5,704 more rows
# Now do the other type of file

setwd(paste0(home, "/data/bits-oxygen/21_02/387/"))

filenames <- list.files(pattern = "*.xlsx")

q1_21b <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 387, col_types = "guess") %>% 
                         dplyr::select(3, 10, 12) %>% 
                         dplyr::rename(temp = 1,
                                       depth = 2, 
                                       oxy = 3) %>%
                         mutate(filename = .x)) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2021, sep = "_"))

q1_21b
# A tibble: 800 × 5
   temp   depth oxy    filename                            q_year
   <chr>  <chr> <chr>  <chr>                               <chr> 
 1 3.1052 5.061 9.0929 SBE19_6164_20210220_0800_77_10_0194 1_2021
 2 3.1009 5.568 9.0868 SBE19_6164_20210220_0800_77_10_0194 1_2021
 3 3.0982 6.074 9.0867 SBE19_6164_20210220_0800_77_10_0194 1_2021
 4 3.0975 6.580 9.0763 SBE19_6164_20210220_0800_77_10_0194 1_2021
 5 3.0973 7.086 9.0834 SBE19_6164_20210220_0800_77_10_0194 1_2021
 6 3.0971 7.592 9.0807 SBE19_6164_20210220_0800_77_10_0194 1_2021
 7 3.0969 8.098 9.0840 SBE19_6164_20210220_0800_77_10_0194 1_2021
 8 3.0965 8.604 9.0761 SBE19_6164_20210220_0800_77_10_0194 1_2021
 9 3.0963 9.111 9.0833 SBE19_6164_20210220_0800_77_10_0194 1_2021
10 3.0963 9.617 9.0859 SBE19_6164_20210220_0800_77_10_0194 1_2021
# ℹ 790 more rows
# Now I need to find the coordinates, and match them by the filename.
q1_21b_lat <- purrr::map_df(filenames, 
                            ~read_excel(.x, skip = 197, col_types = "guess") %>% 
                              mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITS`, "...3", "...4", "filename")) %>% 
  filter(`Cruise:BITS` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
                coords = paste(...3, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_21b_lat
# A tibble: 9 × 3
  lat_minute filename                              lat
  <chr>      <chr>                               <dbl>
1 5551.91    SBE19_6164_20210220_0800_77_10_0194  55.8
2 5547.27    SBE19_6164_20210220_1013_77_10_0195  55.8
3 5550.14    SBE19_6164_20210220_1409_77_10_0196  55.8
4 5546.30    SBE19_6164_20210220_1632_77_10_0197  55.8
5 5510.37    SBE19_6164_20210221_0530_77_10_0198  55.2
6 5512.71    SBE19_6164_20210221_0953_77_10_0199  55.2
7 5515.32    SBE19_6164_20210221_1209_77_10_0200  55.2
8 5513.78    SBE19_6164_20210221_1638_77_10_0202  55.2
9 5527.71    SBE19_6164_20210222_0531_77_10_0203  55.4
q1_21b_lon <- purrr::map_df(filenames, 
                            ~read_excel(.x, skip = 197, col_types = "guess") %>% 
                              mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITS`, "...3", "...4", "filename")) %>% 
  filter(`Cruise:BITS` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
                coords = paste(...3, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon = format.position(as.numeric(lon_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_21b_lon
# A tibble: 9 × 3
  lon_minute filename                              lon
  <chr>      <chr>                               <dbl>
1 1603.30    SBE19_6164_20210220_0800_77_10_0194  16.0
2 1558.36    SBE19_6164_20210220_1013_77_10_0195  16.0
3 1531.23    SBE19_6164_20210220_1409_77_10_0196  15.5
4 1523.66    SBE19_6164_20210220_1632_77_10_0197  15.4
5 1320.23    SBE19_6164_20210221_0530_77_10_0198  13.3
6 1335.50    SBE19_6164_20210221_0953_77_10_0199  13.6
7 1340.10    SBE19_6164_20210221_1209_77_10_0200  13.7
8 1355.96    SBE19_6164_20210221_1638_77_10_0202  13.9
9 1433.13    SBE19_6164_20210222_0531_77_10_0203  14.6
coords_b <- q1_21b_lat %>% left_join(q1_21b_lon, by = "filename") %>% dplyr::select(filename, lat, lon)
coords_b
# A tibble: 9 × 3
  filename                              lat   lon
  <chr>                               <dbl> <dbl>
1 SBE19_6164_20210220_0800_77_10_0194  55.8  16.0
2 SBE19_6164_20210220_1013_77_10_0195  55.8  16.0
3 SBE19_6164_20210220_1409_77_10_0196  55.8  15.5
4 SBE19_6164_20210220_1632_77_10_0197  55.8  15.4
5 SBE19_6164_20210221_0530_77_10_0198  55.2  13.3
6 SBE19_6164_20210221_0953_77_10_0199  55.2  13.6
7 SBE19_6164_20210221_1209_77_10_0200  55.2  13.7
8 SBE19_6164_20210221_1638_77_10_0202  55.2  13.9
9 SBE19_6164_20210222_0531_77_10_0203  55.4  14.6
q1_21b <- q1_21b %>% left_join(coords_b, by = "filename")

q1_21b
# A tibble: 800 × 7
   temp   depth oxy    filename                            q_year   lat   lon
   <chr>  <chr> <chr>  <chr>                               <chr>  <dbl> <dbl>
 1 3.1052 5.061 9.0929 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 2 3.1009 5.568 9.0868 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 3 3.0982 6.074 9.0867 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 4 3.0975 6.580 9.0763 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 5 3.0973 7.086 9.0834 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 6 3.0971 7.592 9.0807 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 7 3.0969 8.098 9.0840 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 8 3.0965 8.604 9.0761 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
 9 3.0963 9.111 9.0833 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
10 3.0963 9.617 9.0859 SBE19_6164_20210220_0800_77_10_0194 1_2021  55.8  16.0
# ℹ 790 more rows
# Combine!
q1_21 <- bind_rows(q1_21a, q1_21b) %>% 
  mutate(temp = as.numeric(temp),
         depth = as.numeric(depth),
         oxy = as.numeric(oxy),
         lat = as.numeric(as.character(lat)),
         lon = as.numeric(as.character(lon)))

Continuing with q4 2021… Here corrdinates are reported differently (as in different positions in the string…). And latitude is even misspelled!?!?

# Q4 2021
setwd(paste0(home, "/data/bits-oxygen/21_11/"))

filenames <- list.files(pattern = "*.xlsx")

q4_21 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 208, col_types = "guess") %>% 
                         dplyr::select(3, 10, 12) %>% 
                         dplyr::rename(temp = 1,
                                       depth = 2, 
                                       oxy = 3) %>%
                         mutate(filename = .x,
                                temp = as.numeric(temp),
                                depth = as.numeric(depth),
                                oxy = as.numeric(oxy))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(4, 2021, sep = "_"))

# Now I need to find the coordinates, and match them by the filename.
q4_21_lat <- purrr::map_df(filenames, 
                           ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITSQ42021`, "...6", "...7", "filename")) %>% 
  filter(`Cruise:BITSQ42021` %in% c("Lattitude", "Longitude")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Lattitude", "lat", "lon"),
                coords = paste(...6, ...7, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q4_21_lat
# A tibble: 37 × 3
   lat_minute           filename                              lat
   <chr>                <chr>                               <dbl>
 1 5510.760999999999999 SBE19_6164_20211121_0617_77_10_0966  55.2
 2 5518.712             SBE19_6164_20211121_1113_77_10_0967  55.3
 3 5516.155999999999999 SBE19_6164_20211121_1314_77_10_0968  55.3
 4 5527.439             SBE19_6164_20211122_0555_77_10_0969  55.4
 5 5536.173000000000002 SBE19_6164_20211122_0859_77_10_0970  55.6
 6 5540.664000000000001 SBE19_6164_20211122_1122_77_10_0971  55.7
 7 5542.558             SBE19_6164_20211122_1411_77_10_0972  55.7
 8 5541.6               SBE19_6164_20211123_0553_77_10_0973  55.7
 9 5539.825000000000003 SBE19_6164_20211123_0922_77_10_0974  55.6
10 5538                 SBE19_6164_20211123_1125_77_10_0975  55.6
# ℹ 27 more rows
q4_21_lon <- purrr::map_df(filenames, 
                           ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:BITSQ42021`, "...6", "...7", "filename")) %>% 
  filter(`Cruise:BITSQ42021` %in% c("Lattitude", "Longitude")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Lattitude", "lat", "lon"),
                coords = paste(...6, ...7, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon = format.position(as.numeric(lon_minute)),
                filename = str_remove(filename, ".xlsx"))

q4_21_lon
# A tibble: 37 × 3
   lon_minute           filename                              lon
   <chr>                <chr>                               <dbl>
 1 1320.062999999999999 SBE19_6164_20211121_0617_77_10_0966  13.3
 2 1351.265999999999998 SBE19_6164_20211121_1113_77_10_0967  13.8
 3 1357.898000000000003 SBE19_6164_20211121_1314_77_10_0968  14.0
 4 1437.692             SBE19_6164_20211122_0555_77_10_0969  14.6
 5 1437.158000000000001 SBE19_6164_20211122_0859_77_10_0970  14.6
 6 1423.521999999999998 SBE19_6164_20211122_1122_77_10_0971  14.4
 7 1425.649000000000001 SBE19_6164_20211122_1411_77_10_0972  14.4
 8 1422.651             SBE19_6164_20211123_0553_77_10_0973  14.4
 9 1430.541             SBE19_6164_20211123_0922_77_10_0974  14.5
10 1440                 SBE19_6164_20211123_1125_77_10_0975  14.7
# ℹ 27 more rows
coords <- q4_21_lat %>% left_join(q4_21_lon, by = "filename") %>% dplyr::select(filename, lat, lon)
coords
# A tibble: 37 × 3
   filename                              lat   lon
   <chr>                               <dbl> <dbl>
 1 SBE19_6164_20211121_0617_77_10_0966  55.2  13.3
 2 SBE19_6164_20211121_1113_77_10_0967  55.3  13.8
 3 SBE19_6164_20211121_1314_77_10_0968  55.3  14.0
 4 SBE19_6164_20211122_0555_77_10_0969  55.4  14.6
 5 SBE19_6164_20211122_0859_77_10_0970  55.6  14.6
 6 SBE19_6164_20211122_1122_77_10_0971  55.7  14.4
 7 SBE19_6164_20211122_1411_77_10_0972  55.7  14.4
 8 SBE19_6164_20211123_0553_77_10_0973  55.7  14.4
 9 SBE19_6164_20211123_0922_77_10_0974  55.6  14.5
10 SBE19_6164_20211123_1125_77_10_0975  55.6  14.7
# ℹ 27 more rows
q4_21 <- q4_21 %>% left_join(coords, by = "filename")

Continuing with q1 2022…

# Q1 2022
setwd(paste0(home, "/data/bits-oxygen/22_02/"))

filenames <- list.files(pattern = "*.xlsx")

q1_22 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 208, col_types = "guess") %>% 
                         dplyr::select(3, 10, 12) %>% 
                         dplyr::rename(temp = 1,
                                       depth = 2, 
                                       oxy = 3) %>%
                         mutate(filename = .x,
                                temp = as.numeric(temp),
                                depth = as.numeric(depth),
                                oxy = as.numeric(oxy))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(1, 2022, sep = "_"))

q1_22
# A tibble: 5,578 × 5
    temp depth   oxy filename                            q_year
   <dbl> <dbl> <dbl> <chr>                               <chr> 
 1  4.07  1.01  8.37 SBE19_6164_20220222_0610_77_10_0170 1_2022
 2  4.07  1.52  8.36 SBE19_6164_20220222_0610_77_10_0170 1_2022
 3  4.07  2.02  8.35 SBE19_6164_20220222_0610_77_10_0170 1_2022
 4  4.08  2.53  8.36 SBE19_6164_20220222_0610_77_10_0170 1_2022
 5  4.08  3.04  8.35 SBE19_6164_20220222_0610_77_10_0170 1_2022
 6  4.08  3.54  8.34 SBE19_6164_20220222_0610_77_10_0170 1_2022
 7  4.07  4.05  8.34 SBE19_6164_20220222_0610_77_10_0170 1_2022
 8  4.07  4.56  8.35 SBE19_6164_20220222_0610_77_10_0170 1_2022
 9  4.06  5.06  8.35 SBE19_6164_20220222_0610_77_10_0170 1_2022
10  4.06  5.57  8.35 SBE19_6164_20220222_0610_77_10_0170 1_2022
# ℹ 5,568 more rows
# Now I need to find the coordinates, and match them by the filename.
q1_22_lat <- purrr::map_df(filenames, 
                           ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "...6", "...7", "filename")) %>% 
  filter(`Cruise:` %in% c("Lattitude", "Longitude")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Lattitude", "lat", "lon"),
                coords = paste(...6, ...7, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_22_lat
# A tibble: 47 × 3
   lat_minute filename                              lat
   <chr>      <chr>                               <dbl>
 1 5541.566   SBE19_6164_20220222_0610_77_10_0170  55.7
 2 5537.281   SBE19_6164_20220222_1013_77_10_0171  55.6
 3 5549.913   SBE19_6164_20220222_1351_77_10_0172  55.8
 4 5550.740   SBE19_6164_20220222_1640_77_10_0173  55.8
 5 5541.217   SBE19_6164_20220223_0527_77_10_0174  55.7
 6 5540.675   SBE19_6164_20220223_0751_77_10_0175  55.7
 7 5540       SBE19_6164_20220223_1110_77_10_0176  55.7
 8 5548.412   SBE19_6164_20220223_1634_77_10_0177  55.8
 9 5510.832   SBE19_6164_20220224_0525_77_10_0178  55.2
10 5514.305   SBE19_6164_20220224_0906_77_10_0179  55.2
# ℹ 37 more rows
q1_22_lon <- purrr::map_df(filenames, 
                           ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "...6", "...7", "filename")) %>% 
  filter(`Cruise:` %in% c("Lattitude", "Longitude")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Lattitude", "lat", "lon"),
                coords = paste(...6, ...7, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon = format.position(as.numeric(lon_minute)),
                filename = str_remove(filename, ".xlsx"))

q1_22_lon
# A tibble: 47 × 3
   lon_minute filename                              lon
   <chr>      <chr>                               <dbl>
 1 1424.237   SBE19_6164_20220222_0610_77_10_0170  14.4
 2 1445.683   SBE19_6164_20220222_1013_77_10_0171  14.8
 3 1517.850   SBE19_6164_20220222_1351_77_10_0172  15.3
 4 1534.110   SBE19_6164_20220222_1640_77_10_0173  15.6
 5 1423.249   SBE19_6164_20220223_0527_77_10_0174  14.4
 6 1425.381   SBE19_6164_20220223_0751_77_10_0175  14.4
 7 1446.195   SBE19_6164_20220223_1110_77_10_0176  14.8
 8 1558.088   SBE19_6164_20220223_1634_77_10_0177  16.0
 9 1319.256   SBE19_6164_20220224_0525_77_10_0178  13.3
10 1339.589   SBE19_6164_20220224_0906_77_10_0179  13.6
# ℹ 37 more rows
coords <- q1_22_lat %>% left_join(q1_22_lon, by = "filename") %>% dplyr::select(filename, lat, lon)
coords
# A tibble: 47 × 3
   filename                              lat   lon
   <chr>                               <dbl> <dbl>
 1 SBE19_6164_20220222_0610_77_10_0170  55.7  14.4
 2 SBE19_6164_20220222_1013_77_10_0171  55.6  14.8
 3 SBE19_6164_20220222_1351_77_10_0172  55.8  15.3
 4 SBE19_6164_20220222_1640_77_10_0173  55.8  15.6
 5 SBE19_6164_20220223_0527_77_10_0174  55.7  14.4
 6 SBE19_6164_20220223_0751_77_10_0175  55.7  14.4
 7 SBE19_6164_20220223_1110_77_10_0176  55.7  14.8
 8 SBE19_6164_20220223_1634_77_10_0177  55.8  16.0
 9 SBE19_6164_20220224_0525_77_10_0178  55.2  13.3
10 SBE19_6164_20220224_0906_77_10_0179  55.2  13.6
# ℹ 37 more rows
q1_22 <- q1_22 %>% left_join(coords, by = "filename")

Final dataset! q4 2022… And of course, here the CTD can spell to latitude again. And, the coordinates again end up in a different string… And best of all, half the files have coordinates spanning over 3 columns, other two…

# Q4 2022
setwd(paste0(home, "/data/bits-oxygen/22_11/"))

filenames <- list.files(pattern = "*.xlsx")

q4_22 <- purrr::map_df(filenames, 
                       ~read_excel(.x, skip = 208, col_types = "guess") %>% 
                         dplyr::select(3, 10, 12) %>% 
                         dplyr::rename(temp = 1,
                                       depth = 2, 
                                       oxy = 3) %>%
                         mutate(filename = .x,
                                temp = as.numeric(temp),
                                depth = as.numeric(depth),
                                oxy = as.numeric(oxy))) %>% 
  mutate(filename = str_remove(filename, ".xlsx"),
         q_year = paste(4, 2022, sep = "_"))

q4_22
# A tibble: 4,316 × 5
    temp depth   oxy filename                            q_year
   <dbl> <dbl> <dbl> <chr>                               <chr> 
 1  8.43  2.03  7.36 SBE19_6164_20221118_0821_77_10_0979 4_2022
 2  8.43  2.53  7.37 SBE19_6164_20221118_0821_77_10_0979 4_2022
 3  8.43  3.04  7.36 SBE19_6164_20221118_0821_77_10_0979 4_2022
 4  8.43  3.55  7.36 SBE19_6164_20221118_0821_77_10_0979 4_2022
 5  8.43  4.05  7.35 SBE19_6164_20221118_0821_77_10_0979 4_2022
 6  8.43  4.56  7.35 SBE19_6164_20221118_0821_77_10_0979 4_2022
 7  8.43  5.07  7.35 SBE19_6164_20221118_0821_77_10_0979 4_2022
 8  8.43  5.57  7.36 SBE19_6164_20221118_0821_77_10_0979 4_2022
 9  8.43  6.08  7.36 SBE19_6164_20221118_0821_77_10_0979 4_2022
10  8.43  6.58  7.35 SBE19_6164_20221118_0821_77_10_0979 4_2022
# ℹ 4,306 more rows
# Now I need to find the coordinates, and match them by the filename.
# The latter half have colnames spanning 3 columns, the first half only two... 

filenames_a <- filenames[1:18] # Look at this indexing... 
  
q4_22_lat_a <- purrr::map_df(filenames_a, 
                             ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "BITS", "...4", "filename")) %>% # not the difference in where the coordinates end up
  filter(`Cruise:` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
         coords = paste(BITS, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q4_22_lat_a
# A tibble: 18 × 3
   lat_minute           filename                              lat
   <chr>                <chr>                               <dbl>
 1 5723.161999999999999 SBE19_6164_20221118_0821_77_10_0979  57.4
 2 5726.398             SBE19_6164_20221118_1027_77_10_0980  57.4
 3 5717.711             SBE19_6164_20221118_1512_77_10_0982  57.3
 4 5539.884             SBE19_6164_20221119_0536_77_10_0983  55.6
 5 5539.884             SBE19_6164_20221119_0550_77_10_LERR  55.6
 6 5547.846             SBE19_6164_20221119_1006_77_10_0984  55.8
 7 5550.565             SBE19_6164_20221119_1209_77_10_0985  55.8
 8 5550.858             SBE19_6164_20221119_1401_77_10_0986  55.8
 9 5511.166             SBE19_6164_20221120_0542_77_10_0987  55.2
10 5516.307             SBE19_6164_20221120_1047_77_10_0988  55.3
11 5515.930             SBE19_6164_20221120_1303_77_10_0989  55.2
12 5553.184             SBE19_6164_20221121_0538_77_10_0991  55.9
13 5549.433             SBE19_6164_20221121_0931_77_10_0992  55.8
14 5547.467             SBE19_6164_20221121_1150_77_10_0993  55.8
15 5545.869             SBE19_6164_20221121_1400_77_10_0994  55.8
16 5703.794             SBE19_6164_20221122_0801_77_10_0995  57.0
17 5705.684             SBE19_6164_20221122_0850_77_10_0996  57.1
18 5710.590             SBE19_6164_20221122_1139_77_10_0997  57.2
filenames_b <- filenames[19:length(filenames)]
  
q4_22_lat_b <- purrr::map_df(filenames_b, 
                             ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "BITS", "...4", "...5", "filename")) %>% # not the difference in where the coordinates end up
  filter(`Cruise:` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
         coords = paste(BITS, paste(...4, ...5, sep = "."), sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lat") %>% 
  dplyr::rename(lat_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lat_minute = str_remove(lat_minute, "\\.N")) %>%  # Note this
  mutate(lat = format.position(as.numeric(lat_minute)),
                filename = str_remove(filename, ".xlsx"))

q4_22_lat_b
# A tibble: 17 × 3
   lat_minute filename                              lat
   <chr>      <chr>                               <dbl>
 1 5714.149   SBE19_6164_20221122_1241_77_10_0998  57.2
 2 5721.94    SBE19_6164_20221122_1506_77_10_0999  57.4
 3 5720.885   SBE19_6164_20221122_1546_77_10_1000  57.3
 4 5750.759   SBE19_6164_20221123_0554_77_10_1001  57.8
 5 5754.319   SBE19_6164_20221123_0934_77_10_1002  57.9
 6 582.551    SBE19_6164_20221123_1051_77_10_1003  58.0
 7 5737.706   SBE19_6164_20221124_0506_77_10_1004  57.6
 8 575.733    SBE19_6164_20221124_1408_77_10_1006  57.1
 9 572.523    SBE19_6164_20221124_1642_77_10_1006  57.0
10 5658.380   SBE19_6164_20221125_0550_77_10_1008  57.0
11 5716.62    SBE19_6164_20221125_1112_77_10_1009  57.3
12 5654.765   SBE19_6164_20221126_0556_77_10_1010  56.9
13 5641.639   SBE19_6164_20221126_1033_77_10_1012  56.7
14 5630.65    SBE19_6164_20221126_1315_77_10_1013  56.5
15 5527.672   SBE19_6164_20221127_0505_77_10_1014  55.4
16 5541.535   SBE19_6164_20221127_1041_77_10_1015  55.7
17 5541.143   SBE19_6164_20221127_1409_77_10_1017  55.7
# Here's how I found how to split
# : it doesn't split all excel-files in the same way, so I can't extract the coordinates properly...
# t <- q4_22_lat %>% filter(E_N == "lat")
# unique(t$BITS)
# unique(t$...4)
# t %>% filter(...4 == 21)
# t %>% filter(...4 == 26.398)

q4_22_lon_a <- purrr::map_df(filenames_a, 
                             ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "BITS", "...4", "filename")) %>% # not the difference in where the coordinates end up
  filter(`Cruise:` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
         coords = paste(BITS, ...4, sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon = format.position(as.numeric(lon_minute)),
         filename = str_remove(filename, ".xlsx"))

q4_22_lon_a
# A tibble: 18 × 3
   lon_minute filename                              lon
   <chr>      <chr>                               <dbl>
 1 1654.673   SBE19_6164_20221118_0821_77_10_0979  16.9
 2 1658.574   SBE19_6164_20221118_1027_77_10_0980  17.0
 3 1725.705   SBE19_6164_20221118_1512_77_10_0982  17.4
 4 1612.837   SBE19_6164_20221119_0536_77_10_0983  16.2
 5 1612.837   SBE19_6164_20221119_0550_77_10_LERR  16.2
 6 1558.516   SBE19_6164_20221119_1006_77_10_0984  16.0
 7 1600.002   SBE19_6164_20221119_1209_77_10_0985  16  
 8 1604.391   SBE19_6164_20221119_1401_77_10_0986  16.1
 9 1335.007   SBE19_6164_20221120_0542_77_10_0987  13.6
10 1344.758   SBE19_6164_20221120_1047_77_10_0988  13.7
11 1354.338   SBE19_6164_20221120_1303_77_10_0989  13.9
12 1555.570   SBE19_6164_20221121_0538_77_10_0991  15.9
13 1529.059   SBE19_6164_20221121_0931_77_10_0992  15.5
14 1515.854   SBE19_6164_20221121_1150_77_10_0993  15.2
15 1529.996   SBE19_6164_20221121_1400_77_10_0994  15.5
16 1851.438   SBE19_6164_20221122_0801_77_10_0995  18.8
17 1857.582   SBE19_6164_20221122_0850_77_10_0996  19.0
18 1854.918   SBE19_6164_20221122_1139_77_10_0997  18.9
q4_22_lon_b <- purrr::map_df(filenames_b, 
                             ~read_excel(.x, skip = 20, col_types = "guess") %>% 
                             mutate(filename = .x)) %>% 
  dplyr::select(c(`Cruise:`, "BITS", "...4", "...5", "filename")) %>% # not the difference in where the coordinates end up
  filter(`Cruise:` %in% c("Latitude:", "Longitude:")) %>% 
  dplyr::rename(E_N = 1) %>% 
  mutate(E_N = ifelse(E_N == "Latitude:", "lat", "lon"),
         coords = paste(BITS, paste(...4, ...5, sep = "."), sep = "")) %>% 
  dplyr::select(E_N, coords, filename) %>% 
  filter(E_N == "lon") %>% 
  dplyr::rename(lon_minute = coords) %>% 
  dplyr::select(-E_N) %>% 
  mutate(lon_minute = str_remove(lon_minute, "\\.E")) %>%  # Note this
  mutate(lon = format.position(as.numeric(lon_minute)),
         filename = str_remove(filename, ".xlsx"))

q4_22_lon_b
# A tibble: 17 × 3
   lon_minute filename                              lon
   <chr>      <chr>                               <dbl>
 1 1904.928   SBE19_6164_20221122_1241_77_10_0998  19.1
 2 196.691    SBE19_6164_20221122_1506_77_10_0999  19.1
 3 199.923    SBE19_6164_20221122_1546_77_10_1000  19.2
 4 1929.939   SBE19_6164_20221123_0554_77_10_1001  19.5
 5 1927.262   SBE19_6164_20221123_0934_77_10_1002  19.4
 6 1930.352   SBE19_6164_20221123_1051_77_10_1003  19.5
 7 177.876    SBE19_6164_20221124_0506_77_10_1004  17.1
 8 1720.160   SBE19_6164_20221124_1408_77_10_1006  17.3
 9 1751.198   SBE19_6164_20221124_1642_77_10_1006  17.8
10 1755.73    SBE19_6164_20221125_0550_77_10_1008  17.9
11 1726.150   SBE19_6164_20221125_1112_77_10_1009  17.4
12 1711.312   SBE19_6164_20221126_0556_77_10_1010  17.2
13 171.53     SBE19_6164_20221126_1033_77_10_1012  17.0
14 1656.90    SBE19_6164_20221126_1315_77_10_1013  16.9
15 1433.258   SBE19_6164_20221127_0505_77_10_1014  14.6
16 1422.920   SBE19_6164_20221127_1041_77_10_1015  14.4
17 1429.208   SBE19_6164_20221127_1409_77_10_1017  14.5
q4_22_lat <- bind_rows(q4_22_lat_a, q4_22_lat_b)
q4_22_lon <- bind_rows(q4_22_lon_a, q4_22_lon_b)

coords <- q4_22_lat %>% left_join(q4_22_lon, by = "filename") %>% dplyr::select(filename, lat, lon)
coords
# A tibble: 35 × 3
   filename                              lat   lon
   <chr>                               <dbl> <dbl>
 1 SBE19_6164_20221118_0821_77_10_0979  57.4  16.9
 2 SBE19_6164_20221118_1027_77_10_0980  57.4  17.0
 3 SBE19_6164_20221118_1512_77_10_0982  57.3  17.4
 4 SBE19_6164_20221119_0536_77_10_0983  55.6  16.2
 5 SBE19_6164_20221119_0550_77_10_LERR  55.6  16.2
 6 SBE19_6164_20221119_1006_77_10_0984  55.8  16.0
 7 SBE19_6164_20221119_1209_77_10_0985  55.8  16  
 8 SBE19_6164_20221119_1401_77_10_0986  55.8  16.1
 9 SBE19_6164_20221120_0542_77_10_0987  55.2  13.6
10 SBE19_6164_20221120_1047_77_10_0988  55.3  13.7
# ℹ 25 more rows
q4_22 <- q4_22 %>% left_join(coords, by = "filename")

# Drop strange coords for now
hist(q4_22$lat)

hist(q4_22$lon)

q4_22 <- q4_22 %>% filter(lon > 10)

Combine all oxygen CTD’s with the matching key. Then summarise them by key, year and quarter before joining.

Note also the depth issue here. Seems like it’s bimodal, with unrealistic values that seem to be one digit too large!?

oxy_with_key <- bind_rows(oxy_old_ctd, # multiple file data with serial no
                          q4_19, q1_20) %>% # single file data with serial no
  mutate(smhi_serial_no_all = paste(q_year, smhi_serial_no, sep = "_")) 

# Summarise data before joining with trawl data. First do it on the depth data.
oxy_with_key_trim_depth <- oxy_with_key %>% 
  filter(!is.na(depth)) %>% 
  group_by(smhi_serial_no_all) %>% 
  mutate(ctd_depth = ifelse(depth > max(depth) -3, "near bottom", "pelagic")) %>% 
  ungroup() %>% 
  filter(ctd_depth == "near bottom") %>% 
  group_by(smhi_serial_no_all) %>% 
  summarise(oxy = mean(oxy),
            depth = mean(depth),
            temp = mean(temp))

oxy_with_key_trim_depth
# A tibble: 256 × 4
   smhi_serial_no_all   oxy depth  temp
   <chr>              <dbl> <dbl> <dbl>
 1 1_2016_1            7.53  59.9  3.72
 2 1_2016_11           4.51  50.5  7.34
 3 1_2016_13           8.12  40.6  3.92
 4 1_2016_15           8.07  46.6  4.13
 5 1_2016_17           6.39  56.0  4.47
 6 1_2016_19           4.02  56.5  4.97
 7 1_2016_22           3.72  62.9  4.87
 8 1_2016_24           8.40  39.6  3.58
 9 1_2016_26           1.88  68.9  5.29
10 1_2016_27           2.70 108.   7.83
# ℹ 246 more rows
oxy_with_key_trim_depth_NA <- oxy_with_key %>% 
  filter(is.na(depth)) %>% 
  group_by(smhi_serial_no_all) %>% 
  filter(pressue_bar == max(pressue_bar)) %>% 
  group_by(smhi_serial_no_all)

oxy_with_key_trim_depth_NA
# A tibble: 78 × 10
# Groups:   smhi_serial_no_all [74]
    temp   oxy depth   lat   lon filename q_year smhi_serial_no pressue_bar
   <dbl> <dbl> <dbl> <dbl> <dbl> <chr>    <chr>           <dbl>       <dbl>
 1 12.0  0.964    NA    NA    NA <NA>     4_2019             64        71.2
 2 12.9  2.40     NA    NA    NA <NA>     4_2019             65        59.0
 3  8.85 6.22     NA    NA    NA <NA>     4_2019             66        38.6
 4  8.84 0        NA    NA    NA <NA>     4_2019             67        38.6
 5  8.84 0        NA    NA    NA <NA>     4_2019             67        38.6
 6  9.09 5.78     NA    NA    NA <NA>     4_2019             68        49.4
 7  8.74 6.40     NA    NA    NA <NA>     4_2019             69        40.6
 8  5.58 0.361    NA    NA    NA <NA>     4_2019             70        76.6
 9  5.67 0.152    NA    NA    NA <NA>     4_2019             71       101. 
10  5.18 1.14     NA    NA    NA <NA>     4_2019             72        72.2
# ℹ 68 more rows
# ℹ 1 more variable: smhi_serial_no_all <chr>
oxy_with_key_trim_join <- bind_rows(oxy_with_key_trim_depth,
                                    oxy_with_key_trim_depth_NA) %>% 
  dplyr::select(smhi_serial_no_all, temp, oxy, depth)

oxy_with_key_trim_join
# A tibble: 334 × 4
   smhi_serial_no_all  temp   oxy depth
   <chr>              <dbl> <dbl> <dbl>
 1 1_2016_1            3.72  7.53  59.9
 2 1_2016_11           7.34  4.51  50.5
 3 1_2016_13           3.92  8.12  40.6
 4 1_2016_15           4.13  8.07  46.6
 5 1_2016_17           4.47  6.39  56.0
 6 1_2016_19           4.97  4.02  56.5
 7 1_2016_22           4.87  3.72  62.9
 8 1_2016_24           3.58  8.40  39.6
 9 1_2016_26           5.29  1.88  68.9
10 1_2016_27           7.83  2.70 108. 
# ℹ 324 more rows
# Join with data!
d_with_key <- d %>% filter(q_year %in% c(unique(oxy_with_key$q_year)))

d_with_key <- d_with_key %>% left_join(oxy_with_key_trim_join, by = "smhi_serial_no_all")

# Which ones are missing?
d_with_key %>% 
  filter(is.na(oxy))
       lon      lat        X        Y year quarter smhi_serial_no   haul_id
1 14.65000 55.46667 477.8717 6146.779 2017       4              3  2017_4_4
2 14.81667 55.68333 488.4726 6170.852 2018       1             11 2018_1_12
3 15.25000 55.76667 515.6857 6180.139 2018       1             16 2018_1_15
4 15.38333 55.75000 524.0617 6178.322 2018       1             17 2018_1_18
5 15.78333 55.63333 549.3158 6165.550 2018       1             20 2018_1_19
6 14.36667 55.70000 460.1953 6172.873 2020       1            142 2020_1_91
  bottom_depth q_year smhi_serial_no_all temp oxy depth
1           67 4_2017           4_2017_3   NA  NA    NA
2           59 1_2018          1_2018_11   NA  NA    NA
3           56 1_2018          1_2018_16   NA  NA    NA
4           61 1_2018          1_2018_17   NA  NA    NA
5           68 1_2018          1_2018_20   NA  NA    NA
6           38 1_2020         1_2020_142   NA  NA    NA
# Don't know why these are missing but we will move on. 

# Next we can check how close to the bottom our average is. Before doing that, check depth again.
d_with_key %>% 
  mutate(depth2 = ifelse(bottom_depth > 250, "deeper than 250 m", "normal")) %>% 
  ggplot(aes(lon, lat, color = as.factor(year), shape = as.factor(quarter))) +
  geom_point() + 
  facet_wrap(~ depth2, ncol = 1) + 
  coord_sf()

d_with_key %>% 
  mutate(depth2 = ifelse(bottom_depth > 250, "deeper than 250 m", "normal")) %>% 
  group_by(year, depth2) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(year, n, fill = depth2)) + 
  geom_bar(stat = "identity")

# I am going to assume that if depth is deeper than 250, I divide by 10, because in script 01, this gives a very good fit to raster-derived depth data
d_with_key %>% 
  drop_na(depth) %>% # These are the ones without any depth info
  mutate(bottom_depth2 = ifelse(bottom_depth > 250, bottom_depth/10, bottom_depth),
         depth_diff = bottom_depth2 - depth) %>% 
  ggplot(aes(smhi_serial_no_all, depth_diff)) + 
  geom_point()

Even though correcting for the depth issue and looking only at data where we have depth from trawl and from CTD, we can still see clearly that the deepest recorded CTD data is not always near the bottom.

Now do the ones with missing keys where we have to match by spatial location.

oxy_without_key <- bind_rows(q4_20, # single file data without serial
                             q1_21, q4_21, q1_22, q4_22)  # multiple file data without serial 


oxy_without_key <- oxy_without_key %>%
  filter(lon > 5)

# Summarize data!
oxy_without_key_trim <- oxy_without_key %>% 
  mutate(id = paste(lat, lon, q_year, sep = "_")) %>%  # remember we can't use smhi serial no here because of missing values
  group_by(id) %>% 
  mutate(ctd_depth = ifelse(depth > max(depth) -3, "near bottom", "pelagic")) %>% 
  ungroup() %>% 
  filter(ctd_depth == "near bottom") %>% 
  group_by(id) %>% 
  summarise(oxy = mean(oxy),
                   depth = mean(depth),
                   temp = mean(temp))


# https://stackoverflow.com/questions/59621797/evaluating-the-closest-distance-from-one-point-between-multiple-options
trawl_sf <- d %>%
  # Filter q_years in the no-key ctd data
  filter(q_year %in% c(unique(oxy_without_key$q_year))) %>% 
  st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>%
  st_set_crs(4326) 

ctd_sf <- oxy_without_key_trim %>%
  separate(id, sep = "_", into = c("lat", "lon", "quarter", "year"), convert = TRUE) %>% 
  mutate(q_year = paste(quarter, year, sep = "_")) %>% 
  drop_na(lat) %>%
  drop_na(lon) %>% 
  st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>%
  st_set_crs(4326) 


lis <- list()

for(i in unique(oxy_without_key$q_year)){
  
  trawl_sf_i <- filter(trawl_sf, q_year == i)
  ctd_sf_i <- filter(ctd_sf, q_year == i)
  
  df_near <- st_join(trawl_sf_i, ctd_sf_i, join = st_nearest_feature)

  # Check distance in metres
  dist <- trawl_sf_i %>%
    cbind(
      ctd_sf_i[st_nearest_feature(trawl_sf_i, ctd_sf_i),]) %>% 
    mutate(dist = st_distance(geometry, geometry.1, by_element = T)) %>% 
    arrange(desc(dist)) %>% 
    as.data.frame()
  
  dist
  
  # Plot on map
  print(st_connect(trawl_sf_i, ctd_sf_i) %>%
    mapview::mapview() +
    mapview::mapview(trawl_sf, color = 'tomato', col.regions = 'tomato') + 
    mapview::mapview(ctd_sf, color = 'steelblue', col.regions = 'steelblue'))
  
  # Here we can see which values of CTD are matched to trawl data
  print(ggplot() + 
    geom_sf(data = trawl_sf, aes(color = "trawl"), size = 3, alpha = .6) +
    geom_sf(data = ctd_sf, aes(color = "ctd"), alpha = .6) +
    scale_color_manual(values = c("steelblue", "tomato"), name = "Data type") + 
    geom_sf(data = st_connect(trawl_sf, ctd_sf), linewidth = 0.2))
  
  lis[[i]] <- df_near %>%
    as.data.frame() %>%
    dplyr::select(haul_id, oxy, depth, temp) %>% 
    mutate(q_year = i,
           dist_m = as.numeric(str_remove(dist$dist, " [m]")))
 
}
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
Calculating lines
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |======================================================================| 100%
Calculating lines
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
Calculating lines
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
Calculating lines
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
Calculating lines

Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
Calculating lines
Calculating nearest IDs

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
Calculating lines

oxy_without_key_trim_join <- bind_rows(lis)

# Join these data into the trawl data by haul_id
d_without_key <- d %>% filter(q_year %in% c(unique(oxy_without_key$q_year)))

d_without_key <- d_without_key %>% left_join(oxy_without_key_trim_join, by = "haul_id")

# Now, I don't have any missing environmental variables because I join by nearest... but that means distances can be waaay off
# Removing values further away than... 5 km... ?! That is 20% of the data
hist(d_without_key$dist_m)

d_without_key <- d_without_key %>% 
  mutate(oxy = ifelse(dist_m > 5000, NA, oxy),
         temp = ifelse(dist_m > 5000, NA, temp),
         depth = ifelse(dist_m > 5000, NA, depth))

# Which ones are missing? Pretty spread out. Do not suspect a systematic error here
d_without_key %>% 
  filter(is.na(oxy))
        lon      lat        X        Y year quarter smhi_serial_no    haul_id
1  17.01667 57.40000 621.1803 6363.710 2020       4            704 2020_4_213
2  19.16667 57.60000 748.9352 6391.826 2020       4            710 2020_4_218
3  13.60000 55.20000 410.8932 6117.942 2020       4            730 2020_4_237
4  13.91667 55.28333 431.1914 6126.856 2020       4             NA 2020_4_238
5  13.95000 55.25000 433.2526 6123.114 2020       4             NA 2020_4_239
6  16.06667 55.86667 566.7531 6191.754 2021       1            194  2021_1_54
7  13.71667 55.26667 418.4548 6125.217 2021       1             NA  2021_1_60
8  13.91667 55.28333 431.1914 6126.856 2021       1             NA  2021_1_61
9  13.98333 55.25000 435.3715 6123.083 2021       1             NA  2021_1_62
10 14.46667 55.45000 466.2665 6144.998 2021       1            203  2021_1_63
11 15.95000 55.78333 559.5795 6182.374 2021       1            195  2021_1_55
12 14.68333 55.63333 480.0637 6165.317 2021       1            204  2021_1_64
13 14.85000 55.68333 490.5685 6170.847 2021       1            205  2021_1_65
14 13.30000 55.16667 391.7102 6114.657 2021       4            966 2021_4_224
15 14.58333 55.60000 473.7457 6161.641 2021       4            970 2021_4_228
16 14.36667 55.70000 460.1953 6172.873 2022       1            170  2022_1_50
17 14.45000 55.66667 465.4032 6169.119 2022       1            175  2022_1_55
18 14.73333 55.66667 483.2257 6169.014 2022       1             NA  2022_1_56
19 15.91667 55.78333 557.4890 6182.346 2022       1            177  2022_1_57
20 16.91667 57.35000 615.3292 6357.972 2022       4             NA 2022_4_240
21 13.63333 55.20000 413.0146 6117.900 2022       4            987 2022_4_249
22 13.91667 55.28333 431.1914 6126.856 2022       4             NA 2022_4_251
23 15.90000 55.78333 556.4438 6182.332 2022       4             NA 2022_4_245
24 15.56667 55.90000 535.4327 6195.095 2022       4            991 2022_4_253
25 15.55000 55.83333 534.4496 6187.667 2022       4             NA 2022_4_254
26 15.26667 55.81667 516.7100 6185.708 2022       4             NA 2022_4_255
   bottom_depth q_year.x smhi_serial_no_all oxy depth temp q_year.y    dist_m
1           552   4_2020         4_2020_704  NA    NA   NA   4_2020 54285.507
2           465   4_2020         4_2020_710  NA    NA   NA   4_2020 49514.482
3           433   4_2020         4_2020_730  NA    NA   NA   4_2020 46388.757
4           371   4_2020          4_2020_NA  NA    NA   NA   4_2020 14921.230
5           381   4_2020          4_2020_NA  NA    NA   NA   4_2020  6327.592
6           551   1_2021         1_2021_194  NA    NA   NA   1_2021  9970.532
7           404   1_2021          1_2021_NA  NA    NA   NA   1_2021  7421.875
8           366   1_2021          1_2021_NA  NA    NA   NA   1_2021  6354.334
9           373   1_2021          1_2021_NA  NA    NA   NA   1_2021  6326.300
10          547   1_2021         1_2021_203  NA    NA   NA   1_2021  6290.197
11          612   1_2021         1_2021_195  NA    NA   NA   1_2021  5635.703
12          611   1_2021         1_2021_204  NA    NA   NA   1_2021  5272.994
13           53   1_2021         1_2021_205  NA    NA   NA   1_2021  5226.216
14          418   4_2021         4_2021_966  NA    NA   NA   4_2021 10642.698
15          659   4_2021         4_2021_970  NA    NA   NA   4_2021  9550.928
16          395   1_2022         1_2022_170  NA    NA   NA   1_2022 12077.134
17          502   1_2022         1_2022_175  NA    NA   NA   1_2022  9334.529
18          552   1_2022          1_2022_NA  NA    NA   NA   1_2022  7856.897
19          613   1_2022         1_2022_177  NA    NA   NA   1_2022  5938.855
20          445   4_2022          4_2022_NA  NA    NA   NA   4_2022 10645.092
21          434   4_2022         4_2022_987  NA    NA   NA   4_2022  7247.593
22          373   4_2022          4_2022_NA  NA    NA   NA   4_2022  5933.495
23          601   4_2022          4_2022_NA  NA    NA   NA   4_2022  5663.951
24           50   4_2022         4_2022_991  NA    NA   NA   4_2022  5373.542
25          399   4_2022          4_2022_NA  NA    NA   NA   4_2022  5272.994
26          517   4_2022          4_2022_NA  NA    NA   NA   4_2022  5232.911
# Next we can check how close to the bottom our average is. Before doing that, check depth again.
d_without_key %>% 
  mutate(depth2 = ifelse(bottom_depth > 250, "deeper than 250 m", "normal")) %>% 
  ggplot(aes(lon, lat, color = as.factor(year), shape = as.factor(quarter))) +
  geom_point() + 
  facet_wrap(~ depth2, ncol = 1) + 
  coord_sf()

d_without_key %>% 
  mutate(depth2 = ifelse(bottom_depth > 250, "deeper than 250 m", "normal")) %>% 
  group_by(year, depth2) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(year, n, fill = depth2)) + 
  geom_bar(stat = "identity")

# I am going to assume that if depth is deeper than 250, I divide by 10, because in script 01, this gives a very good fit to raster-derived depth data
d_without_key %>% 
  drop_na(depth) %>% # These are the ones without any depth info
  mutate(bottom_depth2 = ifelse(bottom_depth > 250, bottom_depth/10, bottom_depth),
         depth_diff = bottom_depth2 - depth) %>% 
  ggplot(aes(smhi_serial_no_all, depth_diff)) + 
  geom_point()

# Interesting... Here it seems that the depth difference is more spread around 0, as we would expect.

Join and save these data:

all_combined_ctd_trawl_data <- bind_rows(d_with_key %>% select(oxy, depth, temp, haul_id),
                                         d_without_key %>% select(oxy, depth, temp, haul_id)) %>% 
  drop_na(oxy, temp)


str(all_combined_ctd_trawl_data)
'data.frame':   326 obs. of  4 variables:
 $ oxy    : num  2.3 3.55 4.46 5.45 6.02 ...
 $ depth  : num  58 47.6 37.7 38.1 43.1 ...
 $ temp   : num  9.29 11.19 10.76 7.77 5.66 ...
 $ haul_id: chr  "2015_4_2" "2015_4_4" "2015_4_6" "2015_4_8" ...
write_csv(all_combined_ctd_trawl_data, paste0(home, "/data/clean/ctd_trawl_joined.csv"))

# Final check on how many stomachs and hauls that can't be matched
d_all <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))
  
nrow(d_all)
[1] 9978
d_all <- d_all %>% left_join(all_combined_ctd_trawl_data, by = "haul_id")

names(d_all)
 [1] "pred_id"                "amphipoda_tot"          "bivalvia_tot"          
 [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
 [7] "gobiidae_tot"           "mysidae_tot"            "non_bio_tot"           
[10] "other_crustacea_tot"    "other_tot"              "other_pisces_tot"      
[13] "platichthys_flesus_tot" "polychaeta_tot"         "saduria_entomon_tot"   
[16] "sprattus_sprattus_tot"  "predator_latin_name"    "species"               
[19] "pred_weight_g"          "pred_length_cm"         "year"                  
[22] "quarter"                "month"                  "ices_rect"             
[25] "subdiv"                 "haul_id"                "smhi_serial_no"        
[28] "X"                      "Y"                      "lat"                   
[31] "lon"                    "fle_kg_km2"             "mcod_kg_km2"           
[34] "scod_kg_km2"            "date"                   "bottom_depth"          
[37] "group"                  "depth.x"                "density_saduria"       
[40] "oxy.x"                  "oxy.y"                  "depth.y"               
[43] "temp"                  
# 13 % of all stomachs
d_all %>% drop_na(oxy.y)
# A tibble: 8,943 × 43
   pred_id        amphipoda_tot bivalvia_tot clupeidae_tot clupea_harengus_tot
   <chr>                  <dbl>        <dbl>         <dbl>               <dbl>
 1 2015_4_COD_1            0               0          0                      0
 2 2015_4_COD_101          0               0          0                      0
 3 2015_4_COD_103          0               0          0                      0
 4 2015_4_COD_104          0               0          0                      0
 5 2015_4_COD_106          0               0          0                      0
 6 2015_4_COD_108          0               0          0                      0
 7 2015_4_COD_11           0               0          0                      0
 8 2015_4_COD_111          0               0          1.74                   0
 9 2015_4_COD_113          0               0          0                      0
10 2015_4_COD_114          0.04            0          0                      0
# ℹ 8,933 more rows
# ℹ 38 more variables: gadus_morhua_tot <dbl>, gobiidae_tot <dbl>,
#   mysidae_tot <dbl>, non_bio_tot <dbl>, other_crustacea_tot <dbl>,
#   other_tot <dbl>, other_pisces_tot <dbl>, platichthys_flesus_tot <dbl>,
#   polychaeta_tot <dbl>, saduria_entomon_tot <dbl>,
#   sprattus_sprattus_tot <dbl>, predator_latin_name <chr>, species <chr>,
#   pred_weight_g <dbl>, pred_length_cm <dbl>, year <dbl>, quarter <dbl>, …
# 9% % of all hauls
d_all %>% distinct(haul_id, .keep_all = TRUE) %>% drop_na(oxy.y)
# A tibble: 322 × 43
   pred_id        amphipoda_tot bivalvia_tot clupeidae_tot clupea_harengus_tot
   <chr>                  <dbl>        <dbl>         <dbl>               <dbl>
 1 2015_4_COD_1               0            0             0                0   
 2 2015_4_COD_101             0            0             0                0   
 3 2015_4_COD_138             0            0             0                0   
 4 2015_4_COD_169             0            0             0                8.22
 5 2015_4_COD_249             0            0             0                0   
 6 2015_4_COD_335             0            0             0                0   
 7 2015_4_COD_342             0            0             0                0   
 8 2015_4_COD_379             0            0             0                0   
 9 2015_4_COD_410             0            0             0                0   
10 2015_4_COD_440             0            0             0                0   
# ℹ 312 more rows
# ℹ 38 more variables: gadus_morhua_tot <dbl>, gobiidae_tot <dbl>,
#   mysidae_tot <dbl>, non_bio_tot <dbl>, other_crustacea_tot <dbl>,
#   other_tot <dbl>, other_pisces_tot <dbl>, platichthys_flesus_tot <dbl>,
#   polychaeta_tot <dbl>, saduria_entomon_tot <dbl>,
#   sprattus_sprattus_tot <dbl>, predator_latin_name <chr>, species <chr>,
#   pred_weight_g <dbl>, pred_length_cm <dbl>, year <dbl>, quarter <dbl>, …

Final conclusions:

  1. Not all trawl data have a CTD measurement, we’d need to discard stomach data (8% of hauls minimum, likely more), and some CTD measurements are nonsens
  2. It is extremely time-consuming to prepare the data, essentially it needs to be done by each survey, and sometimes within each file (station) within a survey
  3. At which depths do we summarise oxygen data? We cannot no for sure it’s the bottom, and the data from the trawl and the CTD make it very clear the deepest recording is not always the bottom
  4. How far from trawl locaiton is CTD ok to use?